
/*****************************************************************************\
 *                              ColUtils
\*****************************************************************************/

/*! @file 
 *
 *  @brief
 *    Utility functions for the CollDet library. Some of them are
 *    (hopefully) temporary only, untilthey become available in OpenSG.
 *
 *  @author Gabriel Zachmann
 *          Weiyu Yi, re-implemented in 2010, principally to eliminate the
 *          dependence upon the OpenSG,  so that it can be used more widely and
 *          easily.
 *
 *  @see
 *    The external access and the saving of data use the single precision of
 *    floating number, while the internal calculation, ie. constructing, uses
 *    double precision.
 *
 *  
 */

//---------------------------------------------------------------------------
//  Includes
//---------------------------------------------------------------------------

#include <stdlib.h>
#include <time.h>
#include <math.h>

#ifdef __sgi
#	include <sys/sysmp.h>
#	include <sys/pda.h>
#endif

#if defined(__sgi) || defined(__linux)
#	include <sys/times.h>
#	include <sys/time.h>
#endif

#ifndef _WIN32
#include <unistd.h>
#include <pthread.h>
#include <alloca.h>
#include <sys/resource.h>
#endif

#define COL_EXPORT

#include <ColUtils.h>
#include <ColTopology.h>
#include <ColExceptions.h>
#include <ColDefs.h>

namespace col {


// --------------------------------------------------------------------------
/** @name               Vector, Matrix, and Transformation Math
 */
// @{


/**  Several 'vector * vector' and 'vector * point' products
 *
 * @return
 *   The dot product.
 *
 * These operators are (hopefully) only temporary functions,
 * until available from OpenSG.
 *
 * The 4-th component of vec4 is ignored!
 *
 **/

REAL operator*( const Point3 &pnt, const REAL vec[3] )
{
	return vec[0]*pnt[0] + vec[1]*pnt[1] + vec[2]*pnt[2];
}

REAL operator*( const Vector4 &vec4, const Point3 &pnt3 )
{
	return vec4[0]*pnt3[0] + vec4[1]*pnt3[1] + vec4[2]*pnt3[2];
}

REAL operator*( const Point3 &pnt3, const Vector3 &vec3 )
{
	return vec3[0]*pnt3[0] + vec3[1]*pnt3[1] + vec3[2]*pnt3[2];
}

REAL operator*( const Vector3 &vec3, const Point3 &pnt3 )
{
	return vec3[0]*pnt3[0] + vec3[1]*pnt3[1] + vec3[2]*pnt3[2];
}

REAL   operator*( const Vector3  &vec3, const Vector4  &vec4 )
{
	return vec3[0]*vec4[0] + vec3[1]*vec4[1] + vec3[2]*vec4[2];
}

///
REAL   dot3( const Vector4  &vec1, const Vector4  &vec2 )
{
	return vec1[0]*vec2[0] + vec1[1]*vec2[1] + vec1[2]*vec2[2];
}

REAL   dot4( const Vector4  &vec1, const Vector4  &vec2 )
{
	return vec1[0]*vec2[0] + vec1[1]*vec2[1] + vec1[2]*vec2[2] + vec1[3]*vec2[3];
}


/**  Square distance between 2 points
 *
 * @param pnt1	point
 * @param pnt2	point
 *
 * @return
 *   The squared distance.
 *
 * This is (hopefully) only a temporary function, until available from OSG.
 **/
REAL dist2( const Point3 &pnt1, const Point3 &pnt2 )
{
#	define sqr(x) ((x)*(x))

	return
		sqr(pnt1[0]-pnt2[0]) +
		sqr(pnt1[1]-pnt2[1]) +
		sqr(pnt1[2]-pnt2[2]);

#	undef sqr
}


/**  Average of an array of points
 *
 * @param points 	the array
 * @param npoints	number of points
 *
 * @return
 *   The average.
 **/
Point3 barycenter( const Point3 *points, const unsigned int npoints )
{
	Point3 c;
	for ( unsigned int i = 0; i < npoints; i ++ )
		for ( unsigned int j = 0; j < 3; j ++ )
			c[j] += points[i][j];
	for ( unsigned int j = 0; j < 3; j ++ )
		c[j] /= npoints;
	return c;
}

/** @overload
 */
Point3 barycenter( const Pnt3Array& points )
{
	Point3 c;
	for ( unsigned int i = 0; i < points.size(); i ++ )
		for ( unsigned int j = 0; j < 3; j ++ )
			c[j] += points[i][j];
	for ( unsigned int j = 0; j < 3; j ++ )
		c[j] /= points.size();
	return c;
}

/**  Average of an array of indexed points
 *
 * @param points 				the array
 * @param index,nindices		array of indices into points
 *
 * @return
 *   The average over all points[ index[i] ], i = 0 .. nindices-1.
 *
 * @warning
 *   No range check on indices will done, i.e., indices pointing outside
 *   points[] will produce garbage or an FPE!
 **/
Point3 barycenter( const Point3* points,
				  const unsigned int index[], const unsigned int nindices )
{
	Point3 c;
	for ( unsigned int i = 0; i < nindices; i ++ )
		for ( unsigned int j = 0; j < 3; j ++ )
			c[j] += points[ index[i] ][j];
	for ( unsigned int j = 0; j < 3; j ++ )
		c[j] /= nindices;
	return c;
}


/** @overload
 */
Point3 barycenter( const Pnt3Array* points,
				  const unsigned int index[], const unsigned int nindices )
{
	Point3 c;
	for ( unsigned int i = 0; i < nindices; i ++ )
		for ( unsigned int j = 0; j < 3; j ++ )
			c[j] += (*points)[ index[i] ][j];
	for ( unsigned int j = 0; j < 3; j ++ )
		c[j] /= nindices;
	return c;
}

/** @overload
 */
Point3 barycenter( const Pnt3Array& points, const TopoFace& face )
{
	Point3 c;
	for ( unsigned int i = 0; i < face.size(); i ++ )
		for ( unsigned int j = 0; j < 3; j ++ )
		{
				c[j] += points[ face[i] ] [j];
		}
	for ( unsigned int j = 0; j < 3; j ++ )
		c[j] /= face.size();
	return c;
}


/**  Test if two vectors are collinear
 *
 * @param a,b		the vectors
 *
 * @return
 *   true if collinear, false otherwise.
 *
 * Check wether or not a = l * b, l!=0.
 * A 0 vector is not considered collinear with any other vector
 * (if both a and b are 0, they are still considered @e not collinear).
 *
 * This is (hopefully) only a temporary function, until available from OSG.
 **/
bool collinear( const Vector3 &a, const Vector3 &b )
{
	REAL l;
	int x;

	// is b == 0?
	x = 0;
	for ( unsigned int i = 1; i < 3; i ++ )
		if ( fabs(b[x]) < fabs(b[i]) )
			x = i;
	if ( fabs(b[x]) < NearZero )
		 return false;
	
	l = a[x] / b[x];
	if ( l < NearZero && l > -NearZero )
		return false;

	return
		fabs(a[0] - l*b[0]) < NearZero &&
		fabs(a[1] - l*b[1]) < NearZero &&
		fabs(a[2] - l*b[2]) < NearZero;
}


/**  Test if two triangles (planes / polygons) are coplanar
 *
 * @param p0,p1,p2	first triangle / plane / polygon
 * @param q0,q1,q2  second ...
 *
 * @return
 *   true if colplanar, false otherwise.
 *
 * Check wether the two planes given by the 2x3 sets of points are coplanar.
 * If the two sets of points are from two different polygons, then this
 * function returns true, if both polygons lie in the same plane.
 *
 * @pre
 *   At least one of the two triangles should yield a normal unequal 0.
 *
 * @warning
 *   Not optimized.
 **/

/**  Matrix * Pnt3f
 *
 * @param m		matrix
 * @param p		point
 *
 * @return
 *   A point := m(3x3) * p .
 *
 * Only the upper left 3x3 part (rotation) of m is considered.
 * This is equivalent to making the translation of m = 0.
 *
 * This is (hopefully) only a temporary function, until available from OSG.
 **/
Point3  mulM3Pnt( const Matrix4  &m, const Point3  &p )
{
	return Point3( p[0] * m[0][0] + p[1] * m[1][0] + p[2] * m[2][0],
				   p[0] * m[0][1] + p[1] * m[1][1] + p[2] * m[2][1],
				   p[0] * m[0][2] + p[1] * m[1][2] + p[2] * m[2][2] );
}


/**  Dominant coord plane which v is "most orthogonal" to
 *
 * @param v		vector
 * @param x,y	indices of "most orthogonal" plane (out)
 *
 * Compute @a x and @a y, such that
 * \f$ \min(v_i) \leq v_x \; \wedge \; \min(v_i) \leq v_y \f$.
 **/
void dominantIndices( const Vector3& v, unsigned int* x, unsigned int* y )
{
	if ( fabs(v[0]) < fabs(v[1]) )
		if ( fabs(v[1]) < fabs(v[2]) )
			*x = 0,  *y = 1;
		else
			*x = 0,  *y = 2;
	else
		if ( fabs(v[0]) < fabs(v[2]) )
			*x = 0,  *y = 1;
		else
			*x = 1,  *y = 2;
}


/** @overload
 *
 * @param v     Vector
 * @param x
 * @param y
 * @param z		index of smallest vector coord (out)
 *
 **/
void dominantIndices( const Vector3& v,
					  unsigned int* x, unsigned int* y, unsigned int* z )
{
	if ( fabs(v[0]) < fabs(v[1]) )
		if ( fabs(v[1]) < fabs(v[2]) )
			*x = 0,  *y = 1, *z = 2;
		else
			*x = 0,  *y = 2, *z = 1;
	else
		if ( fabs(v[0]) < fabs(v[2]) )
			*x = 0,  *y = 1, *z = 2;
		else
			*x = 1,  *y = 2, *z = 0;
}


/**  Dominant coord axis which v is "most parallel" to
 *
 * @param v	vector
 * @return
 *   Index of maximum coordinate of @a v, such that
 *   \f$ v_x \geq \max(v_i) \f$.
 **/
unsigned int dominantIndex( const Vector3& v )
{
	if ( fabs(v[0]) < fabs(v[1]) )
		if ( fabs(v[1]) < fabs(v[2]) )
			return 2;
		else
			return 1;
	else
		if ( fabs(v[0]) < fabs(v[2]) )
			return 2;
		else
			return 0;
}


/**  Normal of a triangle defined by 3 points
 *
 * @param p0,p1,p2		the triangle
 *
 * @return
 *   The normal (p1-p0) x (p2-p0).
 **/
Vector3 triangleNormal( const Point3& p0, const Point3& p1, const Point3& p2 )
{
	Vector3 result = p1 - p0;
	result = cross( result, p2 - p0 );
	return result;
}


// --------------------------------------------------------------------------
// @}
/** @name                   Geometry
 */
// @{


// --------------------------------------------------------------------------
// @}
/** @name                   Timers, timing, sleeping, etc.
 */
// @{


/**  Sleep n microseconds
 *
 * @param microseconds		the sleep time
 *
 * Sleeps a number of microseconds.
 *
 * Under Windoze, this will sleep floor( @a microseconds / 1000 ) milliseconds.
 * If this amounts to 0 milliseconds, the thread will just relinquish its
 * time slice.
 *
 * @todo
 * - Funktion suchen, die Mikrosekunden kann.
 *
 * @bug
 *   On most platforms (Windows, Linux, single-CPU SGI), this function will
 *   sleep at least 10 millliseconds!
 *   (On Linux, usleep and nanosleep don't work as advertised, as of RedHat 7.2)
 **/

void sleep( unsigned int microseconds )
{
#ifdef _WIN32
	Sleep( microseconds / 1000 );
#else

	usleep( microseconds );

#endif // WIN32
}



/**  Get the user time in milliseconds
 *
 * @return
 *   Time in milliseconds.
 *
 * The time is the user time of the process (and all children) since the
 * process started.
 *
 * @implementation
 *   Uses @c times under Unix, and @c GetProcessTimes under Windoze.
 *   Warning: Under Windoze, clock() returns wall-clock time, *not*
 *   user time! (contrary to what the MSDN documentation says!)
 *
 **/

REAL time( void )
{
#ifdef _WIN32

	unsigned long long utime;
	FILETIME creation_time, exit_time, kernel_time, user_time;
	BOOL success = GetProcessTimes( GetCurrentProcess(), &creation_time, &exit_time,
									&kernel_time, &user_time);
	if ( ! success )
		return 0.0;
	utime =   ( static_cast<unsigned long long>(user_time.dwHighDateTime) << 32) 
			+ static_cast<unsigned>(user_time.dwLowDateTime);
	return static_cast<float>( ( utime ) / 1.0e4 );

#else

//	struct tms t;
//	times( &t );
//	return static_cast<float>(t.tms_utime) / sysconf(_SC_CLK_TCK) * 1000.0;
	struct rusage ru;
	getrusage(RUSAGE_SELF, &ru);
	REAL f = ru.ru_utime.tv_sec * 1000.0 + ru.ru_utime.tv_usec / 1000.0;
//printf("times: %u - getrusage: %u.%06u - return: %f\n", t.tms_utime, ru.ru_utime.tv_sec, ru.ru_utime.tv_usec, f);
	return f;
#endif
}





/** @class NanoTimer
 *
 * Timer with nanoseconds resolution.
 *
 * The units of this timer are always nanoseconds,
 * but on some platforms, the actual resolution might be less (microseconds or
 * even less).
 *
 * Where implemented, this class uses the high-speed, high-performance, hardware
 * timers/counters.
 * Otherwise, we just use @a gettimeofday(), which, at least on Linux/Pentium,
 * has microsecond resolution.
 *
 * Currently, the time is @e wall-clock time, not user time!
 *
 * @see
 * - http://www.ncsa.uiuc.edu/UserInfo/Resources/Hardware/IA32LinuxCluster/Doc/timing.html
 * - cedar.intel.com/software/idap/media/pdf/rdtscpm1.pdf
 * - /raphael/knowledge/programming/c/code-snippets/cpu_clock_timer.c
 *
 * @todo
 * - Try to estimate the overhead of a function call to start()
 *   or elapsed().
 * - Use PAPI (http://icl.cs.utk.edu/projects/papi/)
 *   or the "high resolution timers project"
 *   (http://high-res-timers.sourceforge.net/)
 *   when they become widely available (without kernel patches).
 *
 **/


/**  Create a new timer with nanoseconds resolution
 *
 * Saves the current time stamp, too.
 *
 * @sideeffects
 *   See @a checkFrequency().
 *
 **/

NanoTimer::NanoTimer(void)
{
#if defined(_WIN32)
    
	if ( ! M_FrequencyChecked )
		checkFrequency();
	start();

#else

    autoSetClockID();
    start();

#endif
}



/**  Save the current time (stamp) in the timer
 *
 **/

void NanoTimer::start( void )
{
#if defined(_WIN32)    
	m_time_stamp = getTimeStamp();
#else
    clock_gettime( m_clock_id, &m_time_stamp );
#endif
}


/**  Return the time elapsed since the last @a start() in nanoseconds.
 *
 **/
#if defined(_WIN32)

double NanoTimer::elapsed( void ) const
{
	unsigned long long int now = getTimeStamp();
	if ( M_Use_High_Frequ )
		return static_cast<double>(now - m_time_stamp) / M_GHz;
	else
		return static_cast<double>(now - m_time_stamp) * 1000.0;    
}

#else

long long unsigned int NanoTimer::elapsed( void ) const
{
    struct timespec now;
    clock_gettime( m_clock_id, &now );
    return now.tv_nsec - m_time_stamp.tv_nsec + (now.tv_sec-m_time_stamp.tv_sec) * 1000000000;
}

#endif
        

/**  Return current time stamp
 *
 * Should be used only internally by this class,
 * because the meaning of the return value is different depending on whether
 * @a M_Use_High_Frequ is true or not.
 *
 * @warning
 *   We don't check whether checkFrequency succeded!
 *   If it failed, the times will be bogus, or, in the worst case, the program
 *   might even crash.
 **/

#if defined(_WIN32)

long long unsigned int NanoTimer::getTimeStamp( void )
{
	LARGE_INTEGER stamp;
	QueryPerformanceCounter( &stamp );
	return stamp.QuadPart;
}

#else

long long unsigned int NanoTimer::getTimeStamp( void ) const
{
    struct timespec stamp;
    clock_gettime( m_clock_id, &stamp );
    return stamp.tv_sec*1000000000 + stamp.tv_nsec;
}

#endif


/**  Determine the clock frequency of the CPU
 *
 * Try to determine the speed at which the CPUs time-stamp counter is running
 * (on those plastforms where NanoTimer uses this counter),
 * and whether or not there is a high-frequency counter at all.
 *
 * @pre
 *   Assumes that all CPUs in one system run with the same clock frequency!
 *
 * @sideeffects
 *   Class variable M_GHz.
 *
 * @todo
 * - SGI
 *
 * @implementation
 *   Under Linux, we parse /proc/cpuinfo (if __pentiumpro was defined at compile
 *   time); under Windoze, we use QueryPerformanceFrequency.
 *
 * @see
 *   getTimeStamp(). The #ifdef's must match!
 * 
 **/

#if defined(_WIN32)
    
void NanoTimer::checkFrequency( void )
{
	M_GHz = 0.001;
	M_Use_High_Frequ = false;

	LARGE_INTEGER hz;
	BOOL use_high_frequ = QueryPerformanceFrequency( &hz );
	if ( ! use_high_frequ )
		fputs("col::NanoTimer: Can't use high-frequency counter!\n",stderr);
	else
	{
		M_GHz = hz.QuadPart / 1.0E9;
		M_Use_High_Frequ = true;
	}

	if ( M_Use_High_Frequ )
		printf("col::NanoTimer: found %lf GHz\n", M_GHz );
	else
	{
		puts("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
		puts("col::NanoTimer: will be bogus !!");
		puts("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
	}

	M_FrequencyChecked = true;
}

#else

void NanoTimer::setClockID( clockid_t clk )
{
    m_clock_id = clk;
}
        
    
void NanoTimer::autoSetClockID( void )
{
#if defined(_POSIX_THREAD_CPUTIME)
    pthread_getcpuclockid( pthread_self(), &m_clock_id );

#elif defined(_POSIX_CPUTIME)
    clock_getcpuclockid( getpid(), &m_clock_id );

#else
    m_clock_id = CLOCK_REALTIME;

#endif
    return;
}

#endif

#if defined(_WIN32)

/**  Tells whether or not the NanoTimer use the high frequency counter
 *
 * @warning
 *   Valid only after the first NanoTimer has been created!
 **/
bool NanoTimer::usesHighFrequ( void )
{
	return M_Use_High_Frequ;
}

/**  Returns the frequency (resolution) of the counter in GHz
 *
 * @warning
 *   Valid only if @a NanoTimer::usesHighFrequ() = @a true !
 **/
double NanoTimer::frequ( void )
{
	return M_GHz;
}


bool	NanoTimer::M_Use_High_Frequ		= false;
double	NanoTimer::M_GHz				= 0.001;
bool	NanoTimer::M_FrequencyChecked	= false;

#endif


// --------------------------------------------------------------------------
// @}
/** @name                   Random numbers
 */
// @{




/**  Substitute for the drand48() function under Unix (needed under Windoze)
 *
 * @return
 *   Pseudo-random number in the range [0.0 .. 1.0)
 *
 * The random number is generated from rand().
 *
 **/

double my_drand48( void )
{
	return rand() / static_cast<double>( RAND_MAX );
}



/**  Pseudo random number generator
 *
 * @return
 *   A number in the range [0,2147483646].
 *
 * Creates the same sequence of pseudo random numbers on every platform.
 * Use this instead of any random(), drand48(), etc., in regression test
 * programs.
 *
 * @todo
 * - Check that the seed is not the unique "bad" number as explained
 *   in @a http://home.t-online.de/home/mok-kong.shen/ .
 * - @a a should be a primitive root of @a c (see URL above).
 * - Groesseres @a c suchen.
 *
 * @bug
 *   Not multithread-safe.
 *
 * @see
 *   pseudo_randomf
 *
 * @implementation
 *   Based on a linear congruential relation x(new) = a * x(old) + b (mod c).
 *   If you change @a c, you @e must change pseudo_randomf!
 *   Source: @a http://home.t-online.de/home/mok-kong.shen/ .
 *
 **/

unsigned int pseudo_random( void )
{
	static unsigned long long int x_old = 13;
	static unsigned long long int c = 2147483647;
	static unsigned long long int b = 3131747;
	static unsigned long long int a = 79;

	x_old = a * x_old + b;
	x_old = x_old % c;

	return static_cast<unsigned int>( x_old );
}



/**  Pseudo random number generator
 *
 * @return
 *   A number in the range [0,1).
 *
 * Creates the same sequence of pseudo random numbers on every platform.
 * Use this instead of any random(), drand48(), etc., in regression test
 * programs.
 *
 * @see
 *   pseudo_random
 *
 **/

REAL pseudo_randomf( void )
{
	return static_cast<REAL>( pseudo_random() ) /
		   static_cast<REAL>( 2147483647 );
}



/** @class FibRand
 *
 * Lagged Fibonacci random sequence
 *
 * The constructor creates an object, from which you can retrieve random
 * numbers by any of the functions FibRand::rand(), FibRand::mrand(), and
 * FibRand::frand().
 * fills a field of 100 random numbers, which can be retrieved
 *
 * @bug
 *   The range has not really been checked/verified.
 *
 * @implementation
 *   Nach Knuth TAOCP Vol.2 pp.186f.
 *   Internally, a FibRand object stores 100 random numbers, which are then
 *   doled out by one of the "getters". After that, a new set of 100 numbers is
 *   generated. Hoefully, this yields better performance than producing each
 *   random number on demand.
 *
 **/


#define mod_diff(x,y) ( (x) - (y) & (M_MM-1) )
#define is_odd(x)     ( (x) & 1 )
#define evenize(x)    ( (x) & (M_MM-2) )


FibRand::FibRand( int seed )
{

	int t, j;
	int x[M_KK+M_KK-1];
	seed *= M_HashConst;
	int ss = evenize(seed+2);
	for (j=0;j<M_KK;j++) {
		x[j]=ss;
		ss<<=1;
		if (ss>=M_MM)
			ss-=M_MM-2;
	}
	for (;j<M_KK+M_KK-1;j++)
		x[j]=0;
	x[1]++;
	ss = seed & (M_MM-1);
	t = M_TT-1;
	while (t)
	{
		for (j=M_KK-1; j>0; j--)
			x[j+j]=x[j];
		for (j=M_KK+M_KK-2; j>M_KK-M_LL; j-=2)
			x[M_KK+M_KK-1-j]=evenize(x[j]);
		for (j=M_KK+M_KK-2; j>=M_KK; j--)
			if (is_odd(x[j]))
			{
				x[j-(M_KK-M_LL)]=mod_diff(x[j-(M_KK-M_LL)],x[j]);
				x[j-M_KK]=mod_diff(x[j-M_KK],x[j]);
			}
		if(is_odd(ss))
		{
			for (j=M_KK;j>0;j--) x[j]=x[j-1];
			x[0]=x[M_KK];
			if (is_odd(x[M_KK])) x[M_LL]=mod_diff(x[M_LL],x[M_KK]);
		}
		if (ss)
			ss>>=1;
		else
			t--;
	}
	for (j=0;j<M_LL;j++)
		m_buf[j+M_KK-M_LL]=x[j];
	for (;j<M_KK;j++)
		m_buf[j-M_LL]=x[j];  

	refresh();
}



void FibRand::refresh(void)
{
	int i, j;
	for (j=M_KK;j<M_BufSize;j++) 
		m_buf[j]=mod_diff(m_buf[j-M_KK],m_buf[j-M_LL]);
	for (i=0;i<M_LL;i++,j++)
		m_buf[i] = mod_diff(m_buf[j-M_KK],m_buf[j-M_LL]);
	for (;i<M_KK;i++,j++)
		m_buf[i] = mod_diff(m_buf[j-M_KK],m_buf[i-M_LL]);

	m_current_idx=0;
}


#undef evenize
#undef mod_diff
#undef is_odd


/**  Returns a pseudo random integer in the range [0,FibRand::M_MaxRand)
 *
 * @see
 *   FibRand
 *
 **/

unsigned int FibRand::rand( void )
{
	if ( m_current_idx >= M_BufSize )
		refresh();

	return m_buf[m_current_idx++];
}


/**  Returns a pseudo random integer in the range [0,m)
 *
 * @see
 *   FibRand
 *
 **/

unsigned int FibRand::mrand( unsigned int m )
{
	return FibRand::rand() % m;
}


/**  Returns a pseudo random REAL in the range [0,1)
 *
 * @see
 *   FibRand
 *
 **/

REAL FibRand::frand( void )
{
	return static_cast<float>( FibRand::rand() ) /
									static_cast<float>( M_MaxRand );
}




// --------------------------------------------------------------------------
// @}
/** @name                   Floating-Point Tricks
 */
// @{



/**  Returns 0 if x < 0, 0x80000000 otherwise
 *
 * The test 'if ( sign(x) )' seems to be a little bit faster than 
 * 'if ( x < 0.0f )'.
 * 
 * For doubles and int's, use floating-point comparison instead of 
 * this function, because that's faster (see ifcomp.c).
 *
 **/

unsigned int sign( float& x )
{
	return reinterpret_cast<unsigned int &>(x) & 0x80000000;
}

/**  @overload
 **/
unsigned int sign( int x )
{
	return x < 0;
}

/**  @overload
 **/
unsigned int sign( double x )
{
	return x < 0;
}

// BOOST_STATIC_ASSERT( sizeof(float) == 4 );


// --------------------------------------------------------------------------
// @}
/** @name                   Misc
 */
// @{



/**  Lock the calling process to a certain processor
 *
 * @param processor		the processor number
 *
 * A warning is printed if the processor is not isolated (or not enabled).
 *
 * @return
 *   False if locking failed, true otherwise.
 *   Locking can fail if we run on a single-processor machine, or
 *   @a processor is out of bounds.
 *
 * @pre
 *   @a Processor >= 0.
 *
 * @todo
 *   Implement for Windows.
 *
 **/

bool lockToProcessor( unsigned int processor )
{
#ifndef __sgi
	fprintf(stderr, "col:lockToProcessor(%u): not yet implemented on non-sgi"
			"platforms!\n", processor );
	return false;
#else

	// check number of procs
	int nprocs = sysmp(MP_NPROCS);
	if ( nprocs < 2 )
	{
		fputs("col:lockToProcessor: single processor machine -"
			  "locking is useless!\n",stderr);
		return false;
	}
	if ( nprocs > 100000 )
	{
		fprintf(stderr,"col:lockToProcessor: sysmp(MP_NPROCS) = %d seems"
				" bogus!\n", nprocs );
		return false;
	}

	if ( processor >= nprocs )
	{
		fprintf(stderr,"col:lockToProcessor: system has only 0..%d "
				"processors,\n  but you requested number %d!\n",
				nprocs-1, processor );
		return false;
	}

	// get status of procs
	struct pda_stat *pstat =
	   static_cast<struct pda_stat *>( alloca(nprocs*sizeof(struct pda_stat)) );
	int err;
	err = sysmp( MP_STAT, pstat );

	if ( err < 0 )
	{
		perror("col:lockToProcessor: sysmp(MP_STAT)");
		return false;
	}

	if ( ! (pstat[processor].p_flags & PDAF_ISOLATED) ||
		 (pstat[processor].p_flags & PDAF_ENABLED)     )
	{
		fprintf(stderr,"col:lockToProcessor: processor %d is not isolated!\n",
				processor );
	}

	err = sysmp( MP_MUSTRUN, processor );
	if ( err < 0 )
	{
		perror("col:lockToProcessor: sysmp(MP_MUSTRUN)");
		return false;
	}

	return true;

#endif // __sgi
}



// --------------------------------------------------------------------------
// @}
/** @name                   Intersection Tests
 */
// @{


/** Checks whether two coplanar triangles intersect.
 *
 *	@param normalV 				normal vector of plane, in which both triangles must lie
 *	@param polyVv0,..,polyVv2	vertices of first triangle (called 'V')
 *	@param polyUv0,..,polyUv2	vertices of second triangle (called 'U')
 *
 *  @return
 *	  true, if the triangles intersect, false otherwise
 *
 *  @pre
 *	  The triangles are coplanar and normalV is plane's normal vector.
 *	  Both triangles are not degenerated, i.e. all their vertices differ.
 *
 *	Checks, if the two coplanar triangles intersect.  Algorithm by Tomas
 *	Moeller, see comment of intersectTriangle for details.
 *
 **/
bool isectCoplanarTriangles( const Vector3f &normalV,
							 const Point3f  &polyVv0, const Point3f &polyVv1,
							 const Point3f  &polyVv2,
							 const Point3f  &polyUv0, const Point3f &polyUv1,
							 const Point3f  &polyUv2 )
{
	unsigned int i,j;	// indices, used for communication with the macros

	// This edge to edge test is based on Franlin Antonio's gem:
	// "Faster Line Segment Intersection" in Graphics Gems III, pp. 199-202

#   define COL_EDGE_EDGE(__V0, __U0, __U1)                                  \
        _Bx = __U0 [i] - __U1 [i];                                          \
        _By = __U0 [j] - __U1 [j];                                          \
        _Cx = __V0 [i] - __U0 [i];                                          \
        _Cy = __V0 [j] - __U0 [j];                                          \
        _FF = _Ay * _Bx - _Ax * _By;                                        \
        _DD = _By * _Cx - _Bx * _Cy;                                        \
        if((_FF > 0 && _DD >= 0 && _DD <= _FF) ||                           \
           (_FF < 0 && _DD <= 0 && _DD >= _FF))                             \
        {                                                                   \
            _EE = _Ax * _Cy - _Ay * _Cx;                                    \
            if(_FF > 0)                                                     \
            {                                                               \
                if(_EE >= 0 && _EE <= _FF) return true;                     \
            }                                                               \
            else                                                            \
            {                                                               \
                if(_EE <= 0 && _EE >= _FF) return true;                     \
            }                                                               \
        }


	// Check single edge against the three edges of a triangle
	// Assumptions: Triangle and edge lie in the same plane
	// Variables i and j are indices computed in isectCoplanarTriangles

#   define COL_EDGE_AGAINST_TRI(_V0, _V1, _U0, _U1, _U2)                    \
        {                                                                   \
                                    /* temporary variables, also for    */  \
                                    /* interaction with COL_EDGE_EDGE   */  \
            REAL _Ax, _Ay, _Bx, _By, _Cx, _Cy, _EE, _DD, _FF;              \
            _Ax = _V1 [i] - _V0 [i];                                        \
            _Ay = _V1 [j] - _V0 [j];                                        \
                                    /* test edge U0,U1 against V0,V1    */  \
            COL_EDGE_EDGE(_V0, _U0, _U1);                                   \
                                    /* test edge U1,U2 against V0,V1    */  \
            COL_EDGE_EDGE(_V0, _U1, _U2);                                   \
                                    /* test edge U2,U1 against V0,V1    */  \
            COL_EDGE_EDGE(_V0, _U2, _U0);                                   \
        }


	//  first project onto an axis-aligned plane, that maximizes the area of
	//  the triangles, compute indices i, j
	dominantIndices(normalV, &i, &j);

	// test all edges of triangle V against	the edges of triangle U
	COL_EDGE_AGAINST_TRI(polyVv0, polyVv1, polyUv0, polyUv1, polyUv2);
	COL_EDGE_AGAINST_TRI(polyVv1, polyVv2, polyUv0, polyUv1, polyUv2);
	COL_EDGE_AGAINST_TRI(polyVv2, polyVv0, polyUv0, polyUv1, polyUv2);

	// finally, test if triangle V is
	// totally contained in triangle U and
	// vice versa

	return ( pointInTriangle(polyVv0, polyUv0, polyUv1, polyUv2, i, j) ||
			 pointInTriangle(polyUv0, polyVv0, polyVv1, polyVv2, i, j));

}

/** Checks if the edges intersect in 2D.
 *
 *	@param	v0V,v1V  vertices of first edge
 *  @param  u0V,u1V  vertices of second edge
 *	@param	x,y  indices (in {0,1,2}) to dominant plane
 *
 *  @return
 *		true, if the edges intersect, 
 *		false otherwise.
 *
 * 	This edge to edge test is based on Franlin Antonio's gem: "Faster Line
 * 	Segment Intersection" in Graphics Gems III, pp. 199-202.
 *
 *  @pre
 *		@a v0, @a v1, @a u0 and @a u1 describe valid non-degenerated line
 *		segments. Both line segments are coplanar.
 **/
bool isectCoplanarEdges( const Point3f &v0V, const Point3f &v1V,
						 const Point3f &u0V, const Point3f &u1V,
						 unsigned int x, unsigned int y )
{
	REAL axF, ayF, bxF, byF, cxF, cyF, dF, eF, fF;

	axF = v1V[x] - v0V[x];
	ayF = v1V[y] - v0V[y];

	bxF = u0V[x] - u1V[x];
	byF = u0V[y] - u1V[y];

	cxF = v0V[x] - u0V[x];
	cyF = v0V[y] - u0V[y];

	fF = ayF * bxF - axF * byF;
	dF = byF * cxF - bxF * cyF;

	if ( (fF > 0 && dF >= 0 && dF <= fF) ||
		 (fF < 0 && dF <= 0 && dF >= fF) )
	{
		eF = axF * cyF - ayF * cxF;
		if ( fF > 0)
		{
			if ( eF >= 0 && eF <= fF )
				return true;
		}
		else
		{
			if ( eF <= 0 && eF >= fF )
				return true;
		}
	}

	return false;
}


/** Checks, if edge intersects polygon in 2D.
 *
 *	@param v1,v2  vertices of a line segment edge
 *	@param poly  vertices of an arbitrary polygon
 *  @param plSize size of poly
 *  @param normalV	normal
 *	@param x,y		indices (in {0,1,2}) to dominant plane
 *	@param isect	set if edge intersects the polygon (out)
 *	@param oneside	set of edge is completely on one side of the plane (out)
 *
 *	Checks, if the edge (v1,v2) intersects the polygon.
 *  There are three output cases:
 *  -# isect=true is obvious;
 *  -# isect=false and oneside=false means that the edge intersects the plane
 *        of the polygon but not the polygon itself;
 *  -# isect=false and oneside=true means that the edge is completely on one
 *        side of the plane of the polygon.
 *
 *  If both edge and polygon are parallel (or coplanar), then case 2 cannot
 *  happen.
 *
 *  @pre
 *		@a v1 and @a v2 describe a valid line segment, @a poly contains at
 *		least 3 elements, the elements in @a poly are all in the same plane
 *		and define a valid polygon.
 *
 *  @todo
 *    Schleife ueber intersectCoplanarEdges koennte optimiert werden.
 *
 *	@implementation
 *	  Function edgePolygon in arbcoll.c.
 *	  The original function did not handle the coplanar case, this
 *	  implementation does.
 **/
void isectEdgeTriangle( const Point3f &v1, const Point3f &v2,
						const Point3f *poly, const Vector3f &normalV,
						unsigned int x, unsigned int y,
						bool *isect, bool *oneside )
{
	Vector3f w;
	Point3f pt;
	REAL s, t;

	*isect = false;

	w = v2 - v1;
	//t = normalV * w;
	t = dot( normalV, w );
	//s = normalV * (poly[0] - v1);
	s = dot( normalV, poly[0]-v1 );

	if ( fabs( static_cast<float>( t ) ) < NearZero )
	{
		// check whether line segment and triangle plane are parallel or
		// coplanar [ normalV * (v1 - poly[0]) = 0 ]

		if ( fabs( static_cast<float>( s ) ) >= NearZero )
		{
			// parallel
			*oneside = true;
			return;
		}
		*oneside = false;

		// coplanar, do sequence of edge-edge/checks in 2D
		pt = poly[2];

		for ( unsigned int i = 0; i < 3; i++ )
		{
			if (isectCoplanarEdges(v1, v2, pt, poly[i], x, y))
			{
				*isect = true;
				return;
			}
			pt = poly[i];
		}

		// finally check if segment is totally contained in polygon
		*isect = pointInPolygon( v1, poly, 3, x, y );
		return;
	}

	// post cond.: neither parallel nor coplanar

	t = s / t;		// this div could be deferred, but doesn't gain anything
	if ( t < 0.0 || t > 1.0 )
	{
		// edge is completely on one side
		*oneside = true;
		return;
	}
	*oneside = false;

	//pt = v1 + w * static_cast<osg::Real32>( t );						// Compute point of intersection
	pt = v1 + w * static_cast<REAL>( t );						// Compute point of intersection
	*isect = pointInPolygon(pt, poly, 3, x, y);
	return;
}

/** Check if point is inside polygon
 *
 *  @param pt  the point to be tested (must be in plane of polygon)
 *  @param x,y  indices (in {0,1,2}) to dominant plane
 *  @param poly  polygon vertices
 *  @param plSize size of poly
 *
 *  @return
 *    true, if point is inside polygon, false otherwise.
 *
 *  Check if point @a pt is inside the closed polygon given by @a poly. @a pt
 *  and the vertices are 3D points, but the whole check is done with @a pt and
 *  @a poly projected onto the plane @a x/y, where @a x and @a y in {0,1,2}.
 *
 *  @pre
 *    The vertices are assumed to define a closed polygon.
 *    pt is assumed to be in the supporting plane of the polygon.
 *
 *  @implementation
 *    This is the original pointInPolygon from Y (arbcoll.c).
 *
 *  @todo
 *    Fuer Dreiecke und Vierecke optimieren!
 **/
bool pointInPolygon( const Point3f &pt,
					 const Point3f *poly, unsigned int plSize,
					 unsigned int x, unsigned int y )
{
	REAL px, py;
	REAL v1x, v1y, v2x, v2y, x1px, x2px, y1py, y2py;
	//Pnt3f v1, v2, v3;
	Point3f v1, v2, v3;
	REAL y1y2;
	REAL t;
	unsigned int in;

#	define COL_RAY_EDGE_2( succaction )								\
	{																\
		if ( v1x < px && v2x < px )									\
			succaction;												\
		v1y = v1[y];												\
		v2y = v2[y];												\
		if ( v1y > py && v2y > py )									\
			succaction;												\
		if ( v1y <= py && v2y <= py )								\
			succaction;												\
		if ( v1x >= px && v2x >= px )								\
		{															\
			in ++ ;													\
			succaction;												\
		}															\
		y1py = v1y - py;											\
		x1px = v1x - px;											\
		y2py = v2y - py;											\
		x2px = v2x - px;											\
		if ( y1py >= x1px && y2py >= x2px )							\
			succaction;												\
		if ( (-y1py) >= x1px && (-y2py) >= x2px )					\
			succaction;												\
																	\
		y1y2 = v1y - v2y;											\
		t = y1py*(v2x - v1x) + x1px*y1y2;							\
		if ( (t>0.0f && y1y2 > 0.0f) || (t<0.0f && y1y2 < 0.0f) )	\
			in ++ ;													\
    }

	px = pt[x];  py = pt[y];
	in = 0;

	if ( plSize == 3 )
	{
		// Perfomance-improvement propsal:
		// Do not copy Pnt3f itself,  instead copy Pnt3f*
		v1 = poly[0];
		v2 = poly[1];
		v1x = v1[x];
		v2x = v2[x];
		COL_RAY_EDGE_2( goto tedge2 );
tedge2:
		v3 = v2;
		v2 = poly[2];
		v2x = v2[x];
		COL_RAY_EDGE_2( goto tedge3 );
tedge3:
		v1 = v3;
		v1x = v1[x];
		COL_RAY_EDGE_2( goto fin );
		goto fin;
	}

	if ( plSize == 4 )
	{
		v1 = poly[0];
		v2 = poly[1];
		v1x = v1[x];
		v2x = v2[x];
		COL_RAY_EDGE_2( goto qedge2 );
qedge2:
		v3 = v1;
		v1 = poly[2];
		v1x = v1[x];
		COL_RAY_EDGE_2( goto qedge3 );
qedge3:
		v2 = poly[3];
		v2x = v2[x];
		COL_RAY_EDGE_2( goto qedge4 );
qedge4:
		v1 = v3;
		v1x = v1[x];
		COL_RAY_EDGE_2( goto fin );
		goto fin;
	}


	v2 = poly[plSize-1];
	for ( int i = plSize-2; i >= 0; v2 = v1, i-- )
	{
		v1 = poly[i];
		v1x = v1[x];
		v2x = v2[x];
		COL_RAY_EDGE_2( continue );
	}

	// wrap-around edge
	v2 = poly[plSize-1];
	v2x = v2[x];
	// v1x = poly[0][x], already
	COL_RAY_EDGE_2( goto fin );

fin:
	return static_cast<bool>(in & 1);
#undef COL_RAY_EDGE_2
}


/** Check whether point is inside triangle
 *  @param pt point
 *  @param v0,v1,v2 vertices of triangle
 *  @param x,y  indices (in {0,1,2}) to dominant plane
 */
bool pointInTriangle( const Point3f &pt,
					  const Point3f &v0, const Point3f &v1, const Point3f &v2,
					  unsigned int x, unsigned int y )
{
	REAL aa, bb, cc, d0, d1, d2;

	aa = v1[y] - v0[y];
	bb = -(v1[x] - v0[x]);
	cc = -aa * v0[x] - bb * v0[y];
	d0 = aa * pt[x] + bb * pt[y] + cc;

	aa = v2[y] - v1[y];
	bb = -(v2[x] - v1[x]);
	cc = -aa * v1[x] - bb * v1[y];
	d1 = aa * pt[x] + bb * pt[y] + cc;

	aa = v0[y] - v2[y];
	bb = -(v0[x] - v2[x]);
	cc = -aa * v2[x] - bb * v2[y];
	d2 = aa * pt[x] + bb * pt[y] + cc;

	return (d0 * d1 > 0.0) && (d0 * d2 > 0.0);
}


// --------------------------------------------------------------------------
// @}

void print( const Vector3 &vec, FILE *file )
{
	REAL v0 = vec[0];
	REAL v1 = vec[1];
	REAL v2 = vec[2];
	fprintf( file, "[% 8f % 8f % 8f]", v0, v1, v2 );
	//fprintf( file, "[% 8f % 8f % 8f]", vec[0], vec[1], vec[2] );
}

void print( const Vector4 &vec, FILE *file )
{
	REAL v0 = vec[0];
	REAL v1 = vec[1];
	REAL v2 = vec[2];
	REAL v3 = vec[3];
	fprintf( file, "[% 8f % 8f % 8f % 8f]", v0, v1, v2, v3 );
	//fprintf( file, "[% 8f % 8f % 8f % 8f]", vec[0], vec[1], vec[2], vec[3] );
}

void print( const Point3 &pnt, FILE *file )
{
	REAL p0 = pnt[0];
	REAL p1 = pnt[1];
	REAL p2 = pnt[2];
	fprintf( file, "[% 8f % 8f % 8f]", p0, p1, p2 );
	//fprintf( file, "[% 8f % 8f % 8f]", pnt[0], pnt[1], pnt[2] );
}

void print( const Matrix4 &mat, FILE *file )
{
	print( mat.getRow(0), file );
	fprintf( file, "\n" );
	print( mat.getRow(1), file );
	fprintf( file, "\n" );
	print( mat.getRow(2), file );
	fprintf( file, "\n" );
	print( mat.getRow(3), file );
	fprintf( file, "\n" );
}

bool equal( const Matrix4 &mat1, const Matrix4 &mat2, REAL diff )
{
	Matrix4 m = mat1 - mat2;

	for ( unsigned int i=0; i<4; ++ i )
	{
		for ( unsigned int j=0; j<4; ++ j )
		{
			if ( abs( m[i][j] ) > diff ) return false;
		}
	}
	return true;
}

bool equal( const Point3 &p1, const Point3 &p2, REAL diff )
{
	Vector3 p = p1 - p2;
	for ( unsigned int i=0; i<3; ++ i )
	{
		if ( abs( p[i] ) > diff ) return false;
	}
	return true;
}


/**  Affine combination of two points
 *
 * @param pnt1,pnt2		points
 *
 * @return
 *   @a pnt1*c1 + @a pnt2*c2.
 *
 * @pre
 *   @a c1 + @a c2 = 1!
 **/
Point3 lincomb( float c1, const Point3 &pnt1, float c2, const Point3 &pnt2 )
{
	Point3 result;
	for ( unsigned int i = 0; i < 3; i ++ )
		result[i] = c1*pnt1[i] + c2*pnt2[i];
	return result;
}


} // namespace col

